### Code for: Spatial-temporal clustering analysis of yaws on Lihir Island, Papua New Guinea
### to enhance planning and implementation of eradication programs
### Eric Q. Mooring, Oriol Mitja, and Megan B. Murray, 2018
### Code 2 of 3
### Spatial clustering of actively detected yaws cases on Lihir Island, PNG ###

# Ensure that the package "rsatscan" is installed.
# Ensure that SaTScan is installed.
# Make a directory called "SaTScan_Files" to put the input and output files for SaTScan

sslocdir<-"C:/Program Files/SaTScan" # Update name of directory where SaTScan is located

library(rsatscan)

# Load data on serologically confirmed cases and number screened and data on PCR confirmed cases and number screened
# Use a modified version of the file where the order of the villages corresponds to the shape file
# Also load village distances data

R1R7SeroScreen<-read.csv(file="R1R7SeroActiveScreen.csv") # load the data on serologically confirmed cases of active yaws
R3R7PCRScreen<-read.csv(file="R3R7PCRActiveScreen.csv") # load the data on PCR-confirmed cases of active yaws
VillageDistances<-read.csv(file="LihirVillageRoadDistances.csv") # load the village distances data

setwd(dir="./SaTScan_Files/")
dir<-getwd()

### Importantly, data are available from different villages in different rounds, but this is the same whether looking at sero-confirmed or PCR-confirmed

### First, define the indices of the villages used for each round

R1_villages<-which(!(R1R7SeroScreen$Location %in% c("Samo_(1&2)", "Putput_(1&2)"))) # Drop combined Samo and combined Putput
R2_villages<-which(!(R1R7SeroScreen$Location %in% c("Samo_1", "Samo_2", "Putput_(1&2)"))) # Drop separate Samo 1 and Samo 2 and combined Putput
R3_villages<-which(!(R1R7SeroScreen$Location %in% c("Samo_(1&2)", "Putput_(1&2)"))) # Drop combined Samo and combined Putput
R4_villages<-which(!(R1R7SeroScreen$Location %in% c("Kapit", "Lataul", "Lienbil", "Samo_1", "Samo_2", "Talies", "Putput_(1&2)"))) # Drop Kapit, Lataul, Lienbil, Samo 1, Samo 2, Talies, and combined Putput
R5_villages<-which(!(R1R7SeroScreen$Location %in% c("Samo_1", "Samo_2", "Putput_(1&2)"))) # Drop separate Samo 1 and Samo 2 and combined Putput
R6_villages<-which(!(R1R7SeroScreen$Location %in% c("Kapit", "Kosmaiyun", "Sale", "Samo_(1&2)", "Putput_(1&2)"))) # Drop Kapit, Kosmaiyun, Sale, combined Samo and combined Putput
R7_villages<-which(!(R1R7SeroScreen$Location %in% c("Kunaye_1", "Kunaye_2", "Samo_1", "Samo_2", "Putput_1", "Putput_2"))) # Drop Kunaye 1 and 2, Putput 1 and 2, and Samo 1 and 2

### Make dataframes of the number of individuals screened in each round

R1CoverageActive<-data.frame("Location"=R1R7SeroScreen$Location[R1_villages],rep(1,length(R1_villages)),R1R7SeroScreen$ScreenRegR1[R1_villages]) # the 1s are a dummy year
R2CoverageActive<-data.frame("Location"=R1R7SeroScreen$Location[R2_villages],rep(1,length(R2_villages)),R1R7SeroScreen$ScreenRegR2[R2_villages]) # the 1s are a dummy year
R3CoverageActive<-data.frame("Location"=R1R7SeroScreen$Location[R3_villages],rep(1,length(R3_villages)),R1R7SeroScreen$ScreenSumR3[R3_villages]) # the 1s are a dummy year
R4CoverageActive<-data.frame("Location"=R1R7SeroScreen$Location[R4_villages],rep(1,length(R4_villages)),R1R7SeroScreen$ScreenSynR4[R4_villages]) # the 1s are a dummy year
R5CoverageActive<-data.frame("Location"=R1R7SeroScreen$Location[R5_villages],rep(1,length(R5_villages)),R1R7SeroScreen$ScreenSumR5[R5_villages]) # the 1s are a dummy year
R6CoverageActive<-data.frame("Location"=R1R7SeroScreen$Location[R6_villages],rep(1,length(R6_villages)),R1R7SeroScreen$ScreenTalR6[R6_villages]) # the 1s are a dummy year
R7CoverageActive<-data.frame("Location"=R1R7SeroScreen$Location[R7_villages],rep(1,length(R7_villages)),R1R7SeroScreen$ScreenTalR7[R7_villages]) # the 1s are a dummy year

### Save these dataframes as SaTScan population files

write.pop(x=R1CoverageActive,filename="CoverageActive_R1",location=dir)
write.pop(x=R2CoverageActive,filename="CoverageActive_R2",location=dir)
write.pop(x=R3CoverageActive,filename="CoverageActive_R3",location=dir)
write.pop(x=R4CoverageActive,filename="CoverageActive_R4",location=dir)
write.pop(x=R5CoverageActive,filename="CoverageActive_R5",location=dir)
write.pop(x=R6CoverageActive,filename="CoverageActive_R6",location=dir)
write.pop(x=R7CoverageActive,filename="CoverageActive_R7",location=dir)

### Prepare non-Euclidean neighbors matrices for each round, restricted just to the villages for which in that round there are data

VillagesIn_R1<-which((VillageDistances$FromVillage %in% R1R7SeroScreen$Location[R1_villages]) & (VillageDistances$ToVillage %in% R1R7SeroScreen$Location[R1_villages]))
VillageDistances_R1<-VillageDistances[VillagesIn_R1,]
NonEuclideanNeighbors_R1<-matrix(data=VillageDistances_R1$ToVillage,nrow=27,ncol=27,byrow=T) # make matrix, which has a column for each location
NonEuclideanNeighbors_R1<-as.data.frame(NonEuclideanNeighbors_R1) # save matrix as dataframe (because of what rsatscan requires)
# the row names in the dataframe look (and are) redundant, but that's okay, because the default of write.nbr is to ignore row names
write.nbr(x = NonEuclideanNeighbors_R1, filename = "Lihir_Active_R1", location = dir)

VillagesIn_R2<-which((VillageDistances$FromVillage %in% R1R7SeroScreen$Location[R2_villages]) & (VillageDistances$ToVillage %in% R1R7SeroScreen$Location[R2_villages]))
VillageDistances_R2<-VillageDistances[VillagesIn_R2,]
NonEuclideanNeighbors_R2<-matrix(data=VillageDistances_R2$ToVillage,nrow=26,ncol=26,byrow=T) # make matrix, which has a column for each location
NonEuclideanNeighbors_R2<-as.data.frame(NonEuclideanNeighbors_R2) # save matrix as dataframe (because of what rsatscan requires)
# the row names in the dataframe look (and are) redundant, but that's okay, because the default of write.nbr is to ignore row names
write.nbr(x = NonEuclideanNeighbors_R2, filename = "Lihir_Active_R2", location = dir)

VillagesIn_R3<-which((VillageDistances$FromVillage %in% R1R7SeroScreen$Location[R3_villages]) & (VillageDistances$ToVillage %in% R1R7SeroScreen$Location[R3_villages]))
VillageDistances_R3<-VillageDistances[VillagesIn_R3,]
NonEuclideanNeighbors_R3<-matrix(data=VillageDistances_R3$ToVillage,nrow=27,ncol=27,byrow=T) # make matrix, which has a column for each location
NonEuclideanNeighbors_R3<-as.data.frame(NonEuclideanNeighbors_R3) # save matrix as dataframe (because of what rsatscan requires)
# the row names in the dataframe look (and are) redundant, but that's okay, because the default of write.nbr is to ignore row names
write.nbr(x = NonEuclideanNeighbors_R3, filename = "Lihir_Active_R3", location = dir)

VillagesIn_R4<-which((VillageDistances$FromVillage %in% R1R7SeroScreen$Location[R4_villages]) & (VillageDistances$ToVillage %in% R1R7SeroScreen$Location[R4_villages]))
VillageDistances_R4<-VillageDistances[VillagesIn_R4,]
NonEuclideanNeighbors_R4<-matrix(data=VillageDistances_R4$ToVillage,nrow=22,ncol=22,byrow=T) # make matrix, which has a column for each location
NonEuclideanNeighbors_R4<-as.data.frame(NonEuclideanNeighbors_R4) # save matrix as dataframe (because of what rsatscan requires)
# the row names in the dataframe look (and are) redundant, but that's okay, because the default of write.nbr is to ignore row names
write.nbr(x = NonEuclideanNeighbors_R4, filename = "Lihir_Active_R4", location = dir)

VillagesIn_R5<-which((VillageDistances$FromVillage %in% R1R7SeroScreen$Location[R5_villages]) & (VillageDistances$ToVillage %in% R1R7SeroScreen$Location[R5_villages]))
VillageDistances_R5<-VillageDistances[VillagesIn_R5,]
NonEuclideanNeighbors_R5<-matrix(data=VillageDistances_R5$ToVillage,nrow=26,ncol=26,byrow=T) # make matrix, which has a column for each location
NonEuclideanNeighbors_R5<-as.data.frame(NonEuclideanNeighbors_R5) # save matrix as dataframe (because of what rsatscan requires)
# the row names in the dataframe look (and are) redundant, but that's okay, because the default of write.nbr is to ignore row names
write.nbr(x = NonEuclideanNeighbors_R5, filename = "Lihir_Active_R5", location = dir)

VillagesIn_R6<-which((VillageDistances$FromVillage %in% R1R7SeroScreen$Location[R6_villages]) & (VillageDistances$ToVillage %in% R1R7SeroScreen$Location[R6_villages]))
VillageDistances_R6<-VillageDistances[VillagesIn_R6,]
NonEuclideanNeighbors_R6<-matrix(data=VillageDistances_R6$ToVillage,nrow=24,ncol=24,byrow=T) # make matrix, which has a column for each location
NonEuclideanNeighbors_R6<-as.data.frame(NonEuclideanNeighbors_R6) # save matrix as dataframe (because of what rsatscan requires)
# the row names in the dataframe look (and are) redundant, but that's okay, because the default of write.nbr is to ignore row names
write.nbr(x = NonEuclideanNeighbors_R6, filename = "Lihir_Active_R6", location = dir)

VillagesIn_R7<-which((VillageDistances$FromVillage %in% R1R7SeroScreen$Location[R7_villages]) & (VillageDistances$ToVillage %in% R1R7SeroScreen$Location[R7_villages]))
VillageDistances_R7<-VillageDistances[VillagesIn_R7,]
NonEuclideanNeighbors_R7<-matrix(data=VillageDistances_R7$ToVillage,nrow=23,ncol=23,byrow=T) # make matrix, which has a column for each location
NonEuclideanNeighbors_R7<-as.data.frame(NonEuclideanNeighbors_R7) # save matrix as dataframe (because of what rsatscan requires)
# the row names in the dataframe look (and are) redundant, but that's okay, because the default of write.nbr is to ignore row names
write.nbr(x = NonEuclideanNeighbors_R7, filename = "Lihir_Active_R7", location = dir)

### Make dataframes for each round of the number of serologically confirmed cases in each village

SeroR1ActiveCases<-data.frame("Location"=R1R7SeroScreen$Location[R1_villages],R1R7SeroScreen$SeroR1[R1_villages])
SeroR2ActiveCases<-data.frame("Location"=R1R7SeroScreen$Location[R2_villages],R1R7SeroScreen$SeroR2[R2_villages])
SeroR3ActiveCases<-data.frame("Location"=R1R7SeroScreen$Location[R3_villages],R1R7SeroScreen$SeroR3[R3_villages])
SeroR4ActiveCases<-data.frame("Location"=R1R7SeroScreen$Location[R4_villages],R1R7SeroScreen$SeroR4[R4_villages])
SeroR5ActiveCases<-data.frame("Location"=R1R7SeroScreen$Location[R5_villages],R1R7SeroScreen$SeroR5[R5_villages])
SeroR6ActiveCases<-data.frame("Location"=R1R7SeroScreen$Location[R6_villages],R1R7SeroScreen$SeroR6[R6_villages])
SeroR7ActiveCases<-data.frame("Location"=R1R7SeroScreen$Location[R7_villages],R1R7SeroScreen$SeroR7[R7_villages])

### Save these dataframes as SaTScan case files

write.cas(x=SeroR1ActiveCases,filename="ActiveCases_Sero_R1",location=dir)
write.cas(x=SeroR2ActiveCases,filename="ActiveCases_Sero_R2",location=dir)
write.cas(x=SeroR3ActiveCases,filename="ActiveCases_Sero_R3",location=dir)
write.cas(x=SeroR4ActiveCases,filename="ActiveCases_Sero_R4",location=dir)
write.cas(x=SeroR5ActiveCases,filename="ActiveCases_Sero_R5",location=dir)
write.cas(x=SeroR6ActiveCases,filename="ActiveCases_Sero_R6",location=dir)
write.cas(x=SeroR7ActiveCases,filename="ActiveCases_Sero_R7",location=dir)

### Make dataframes for each round of the number of PCR-confirmed cases in each village

PCRR3ActiveCases<-data.frame("Location"=R3R7PCRScreen$Location[R3_villages],R3R7PCRScreen$PCRR3[R3_villages])
PCRR4ActiveCases<-data.frame("Location"=R3R7PCRScreen$Location[R4_villages],R3R7PCRScreen$PCRR4[R4_villages])
PCRR5ActiveCases<-data.frame("Location"=R3R7PCRScreen$Location[R5_villages],R3R7PCRScreen$PCRR5[R5_villages])
PCRR6ActiveCases<-data.frame("Location"=R3R7PCRScreen$Location[R6_villages],R3R7PCRScreen$PCRR6[R6_villages])
PCRR7ActiveCases<-data.frame("Location"=R3R7PCRScreen$Location[R7_villages],R3R7PCRScreen$PCRR7[R7_villages])

### Save these dataframes as SaTScan case files

write.cas(x=PCRR3ActiveCases,filename="ActiveCases_PCR_R3",location=dir)
write.cas(x=PCRR4ActiveCases,filename="ActiveCases_PCR_R4",location=dir)
write.cas(x=PCRR5ActiveCases,filename="ActiveCases_PCR_R5",location=dir)
write.cas(x=PCRR6ActiveCases,filename="ActiveCases_PCR_R6",location=dir)
write.cas(x=PCRR7ActiveCases,filename="ActiveCases_PCR_R7",location=dir)


### Run SaTScan analyses ###

# Give name of directory where SaTScan is located
sslocdir<-"C:/Program Files/SaTScan"

# Define settings that will be used repeatedly

invisible(ss.options(reset=TRUE)) # reset satscan options to defaults
ss.options(invals=list(PrecisionCaseTimes=0, # this is necessary because otherwise SatScan will demand a date, despite being a purely spatial analysis
                       AnalysisType=1, # purely spatial
                       ModelType=0, # discrete Poisson 
                       UseNeighborsFile='y', # we will use neighbors file
                       CoordinatesType=0,
                       IncludePurelySpatial="y",
                       OutputShapefiles="n",
                       MostLikelyClusterEachCentroidDBase="n",
                       MostLikelyClusterCaseInfoEachCentroidDBase="n",
                       CensusAreasReportedClustersDBase="n",
                       IncludeRelativeRisksCensusAreasDBase="n",
                       SaveSimLLRsDBase="n",
                       LaunchKMLViewer="n", # don't launch KML viewer
                       IncludeClusterLocationsKML="n",
                       CriteriaForReportingSecondaryClusters=0, # no geographic overlap of reported clusters
                       ReportClusterRank="y" # reports rank of clusters in Monte Carlo reps
))

## Run main analysis on serologically confirmed cases for rounds 1 through 7 ##

# Discrete Poisson analysis: Round 1 serologically confirmed cases

ss.options(invals=list(CaseFile="ActiveCases_Sero_R1.cas", # identify case file
                       PopulationFile="CoverageActive_R1.pop", # identify "population" file
                       NeighborsFilename="Lihir_Active_R1.nbr" # identify neighbors file
))
write.ss.prm(location=dir, filename="ActiveCases_Sero_R1")
satscan(prmlocation=dir, prmfilename="ActiveCases_Sero_R1", sslocation = sslocdir, cleanup=F)


# Discrete Poisson analysis: Round 2 serologically confirmed cases

ss.options(invals=list(CaseFile="ActiveCases_Sero_R2.cas", # identify case file
                       PopulationFile="CoverageActive_R2.pop", # identify "population" file
                       NeighborsFilename="Lihir_Active_R2.nbr" # identify neighbors file
))
write.ss.prm(location=dir, filename="ActiveCases_Sero_R2")
satscan(prmlocation=dir, prmfilename="ActiveCases_Sero_R2", sslocation = sslocdir, cleanup=F)


# Discrete Poisson analysis: Round 3 serologically confirmed cases

ss.options(invals=list(CaseFile="ActiveCases_Sero_R3.cas", # identify case file
                       PopulationFile="CoverageActive_R3.pop", # identify "population" file
                       NeighborsFilename="Lihir_Active_R3.nbr" # identify neighbors file
))
write.ss.prm(location=dir, filename="ActiveCases_Sero_R3")
satscan(prmlocation=dir, prmfilename="ActiveCases_Sero_R3", sslocation = sslocdir, cleanup=F)


# Discrete Poisson analysis: Round 4 serologically confirmed cases

ss.options(invals=list(CaseFile="ActiveCases_Sero_R4.cas", # identify case file
                       PopulationFile="CoverageActive_R4.pop", # identify "population" file
                       NeighborsFilename="Lihir_Active_R4.nbr" # identify neighbors file
))
write.ss.prm(location=dir, filename="ActiveCases_Sero_R4")
satscan(prmlocation=dir, prmfilename="ActiveCases_Sero_R4", sslocation = sslocdir, cleanup=F)


# Discrete Poisson analysis: Round 5 serologically confirmed cases

ss.options(invals=list(CaseFile="ActiveCases_Sero_R5.cas", # identify case file
                       PopulationFile="CoverageActive_R5.pop", # identify "population" file
                       NeighborsFilename="Lihir_Active_R5.nbr" # identify neighbors file
))
write.ss.prm(location=dir, filename="ActiveCases_Sero_R5")
satscan(prmlocation=dir, prmfilename="ActiveCases_Sero_R5", sslocation = sslocdir, cleanup=F)


# Discrete Poisson analysis: Round 6 serologically confirmed cases

ss.options(invals=list(CaseFile="ActiveCases_Sero_R6.cas", # identify case file
                       PopulationFile="CoverageActive_R6.pop", # identify "population" file
                       NeighborsFilename="Lihir_Active_R6.nbr" # identify neighbors file
))
write.ss.prm(location=dir, filename="ActiveCases_Sero_R6")
satscan(prmlocation=dir, prmfilename="ActiveCases_Sero_R6", sslocation = sslocdir, cleanup=F)


# Discrete Poisson analysis: Round 7 serologically confirmed cases

ss.options(invals=list(CaseFile="ActiveCases_Sero_R7.cas", # identify case file
                       PopulationFile="CoverageActive_R7.pop", # identify "population" file
                       NeighborsFilename="Lihir_Active_R7.nbr" # identify neighbors file
))
write.ss.prm(location=dir, filename="ActiveCases_Sero_R7")
satscan(prmlocation=dir, prmfilename="ActiveCases_Sero_R7", sslocation = sslocdir, cleanup=F)


## Run main analysis on PCR-confirmed cases for rounds 3 through 7 ##


# Discrete Poisson analysis: Round 3 PCR-confirmed cases

ss.options(invals=list(CaseFile="ActiveCases_PCR_R3.cas", # identify case file
                       PopulationFile="CoverageActive_R3.pop", # identify "population" file
                       NeighborsFilename="Lihir_Active_R3.nbr" # identify neighbors file
))
write.ss.prm(location=dir, filename="ActiveCases_PCR_R3")
satscan(prmlocation=dir, prmfilename="ActiveCases_PCR_R3", sslocation = sslocdir, cleanup=F)


# Discrete Poisson analysis: Round 4 PCR-confirmed cases

ss.options(invals=list(CaseFile="ActiveCases_PCR_R4.cas", # identify case file
                       PopulationFile="CoverageActive_R4.pop", # identify "population" file
                       NeighborsFilename="Lihir_Active_R4.nbr" # identify neighbors file
))
write.ss.prm(location=dir, filename="ActiveCases_PCR_R4")
satscan(prmlocation=dir, prmfilename="ActiveCases_PCR_R4", sslocation = sslocdir, cleanup=F)


# Discrete Poisson analysis: Round 5 PCR-confirmed cases

ss.options(invals=list(CaseFile="ActiveCases_PCR_R5.cas", # identify case file
                       PopulationFile="CoverageActive_R5.pop", # identify "population" file
                       NeighborsFilename="Lihir_Active_R5.nbr" # identify neighbors file
))
write.ss.prm(location=dir, filename="ActiveCases_PCR_R5")
satscan(prmlocation=dir, prmfilename="ActiveCases_PCR_R5", sslocation = sslocdir, cleanup=F)


# Discrete Poisson analysis: Round 6 serologically confirmed cases

ss.options(invals=list(CaseFile="ActiveCases_PCR_R6.cas", # identify case file
                       PopulationFile="CoverageActive_R6.pop", # identify "population" file
                       NeighborsFilename="Lihir_Active_R6.nbr" # identify neighbors file
))
write.ss.prm(location=dir, filename="ActiveCases_PCR_R6")
satscan(prmlocation=dir, prmfilename="ActiveCases_PCR_R6", sslocation = sslocdir, cleanup=F)


# Discrete Poisson analysis: Round 7 serologically confirmed cases

ss.options(invals=list(CaseFile="ActiveCases_PCR_R7.cas", # identify case file
                       PopulationFile="CoverageActive_R7.pop", # identify "population" file
                       NeighborsFilename="Lihir_Active_R7.nbr" # identify neighbors file
))
write.ss.prm(location=dir, filename="ActiveCases_PCR_R7")
satscan(prmlocation=dir, prmfilename="ActiveCases_PCR_R7", sslocation = sslocdir, cleanup=F)